Advanced Data Structures

Table of Contents

Trie

Trie is an efficient information retrieval data structure. Using trie, search complexities can be brought to optimal limit (key length). If we store keys in binary search tree, a well balanced BST will need time proportional to M * log N, where M is maximum string length and N is number of keys in tree. Using trie, we can search the key in O(M) time. However the penalty is on trie storage requirements.

simple Trie implementation in C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define ARRAY_SIZE(a)  sizeof(a) / sizeof(a[0])

#define ALPHABET_SIZE (26)
/* only use key a-z */
#define CHAR_TO_INDEX(c)   (c - 'a')
#define LEAF_VALUE   1

typedef struct TrieNode_ TrieNode;

struct TrieNode_ {
  int val;
  TrieNode *children[ALPHABET_SIZE];
};

typedef struct Trie_ Trie;
struct Trie_ {
  TrieNode *root;
};

TrieNode *GetNode(void)
{
  TrieNode *pNode;
  pNode = (TrieNode *) malloc(sizeof(TrieNode));

  if(pNode == NULL){
    fprintf(stderr, "malloc failed\n");
    exit (EXIT_FAILURE);
  }
  pNode->val = 0;
  int i ;
  for(i = 0; i < ALPHABET_SIZE; i++){
    pNode->children[i] = NULL;
  }
  return pNode;
}

/* Initialize trie */
void InitialTrie(Trie *pTrie)
{
  pTrie->root = GetNode();
}

/* If not present, insert key into trie.
 * If the key is prefix of trie node, just mark leaf node
 */
void insert(Trie *pTrie, char key[])
{
  int level, index;
  TrieNode *p;
  int length = strlen(key);
  int i;

  if(pTrie->root == NULL)
    InitialTrie(pTrie);
  p = pTrie->root;

  for(level = 0; level < length; level++){
    index = CHAR_TO_INDEX(key[level]);
    if(p->children[index] == NULL)
      p->children[index] = GetNode();
    p = p->children[index];
  }
  p->val = LEAF_VALUE;
}

/* return non zero, if key is in the Trie */
int search(Trie *pTrie, char key[])
{
  int level, index;
  TrieNode *p;
  int length = strlen(key);
  int i;

  if(pTrie->root == NULL)
    return 0;
  p = pTrie->root;
  for(level = 0; level < length; level++){
    index = CHAR_TO_INDEX(key[level]);
    if(p->children[index] == NULL)
      return 0;
    p = p->children[index];
  }
  return ((p != NULL) && (p->val));
}

int main(int argc, char *argv[])
{
  char keys[][8] = {"the", "a", "there", "answer", "any", "by", "bye", "their"};
  Trie trie;
  int i;

  char output[][32] = {"Not present in trie", "Present in trie"};

  InitialTrie(&trie);

    // Construct trie
  for(i = 0; i < ARRAY_SIZE(keys); i++)
  {
    insert(&trie, keys[i]);
  }

    // Search for different keys
  printf("%s --- %s\n", "the", output[search(&trie, "the")] );
  printf("%s --- %s\n", "these", output[search(&trie, "these")] );
  printf("%s --- %s\n", "their", output[search(&trie, "their")] );
  printf("%s --- %s\n", "thaw", output[search(&trie, "thaw")] );
  return 0;
}

Trie implementation in C++


Rope

Rope is a data structure composed of smaller strings that is used for efficiently storing and manipulating a very long string.

Rope implementation in C++


Skip list

Skip list is a data structure that allows fast search within an ordered sequence of elements. Fast search is made possible by maintaining a linked hierarchy of subsequences, each skipping over fewer elements.

William Pugh in "Skip Lists: A Probabilistic Alternative to Balanced Trees"

Redis ordered set implementation uses skip list

R-tree

R-trees are tree data structures used for spatial access methods, i.e., for indexing multi-dimensional information such as geographical coordinates, rectangles or polygons.

Author: Shi Shougang

Created: 2015-03-05 Thu 23:21

Emacs 24.3.1 (Org mode 8.2.10)

Validate